---
title: "S1 Appendix: Trust and cooperative behavior: Evidence from the realm of data-sharing"
author: ""
date: ""
output: 
  bookdown::pdf_document2:
    toc: no
    keep_tex: true
    number_sections: FALSE
geometry: [top=0.85in,footskip=0.75in]
fontsize: [10pt,letterpaper]
documentclass: article
header-includes:
   - \usepackage{color}
   - \usepackage{caption}
   - \usepackage{float}
   - \usepackage{dcolumn}
   - \usepackage{tabu}
bibliography: references.bib
csl: plos.csl
suppress-bibliography: TRUE
---

\newcommand*{\secref}[1]{Section~\ref{#1}}

```{r setup, include = FALSE}
# Please check INSTRUCTIONS.txt to reproduce the study.

  library(knitr)
  knitr::opts_chunk$set(
	fig.pos = "h",
	cache = FALSE
  )
  set.seed(12345)
  rm(list=ls())
```





```{r packages, message=FALSE, warning=FALSE, include=FALSE}
  library(haven)
  library(plotly)
  library(dplyr)
  library(stargazer)
  library(readr)
  library(stringr)
  library(xtable)
  library(kableExtra)
  library(knitr)
  library(tidyr)
```

```{r include=FALSE}
  data <- data.frame(read_csv("data_recoded.csv"))
  data_beford_discard <- data.frame(read_csv("data_beford_discard.csv"))
```



\setcounter{table}{0}
\renewcommand{\thetable}{\Alph{table}}
\renewcommand{\figurename}{Table}

\setcounter{figure}{0}
\renewcommand\thefigure{\Alph{figure}}
\renewcommand{\figurename}{Fig}




# Summary statistics 

Table \@ref(tab:summary) and \@ref(tab:table-summary-categorical) provide summary statistics on some socio-demographic variables of our sample.

```{r table-summary-stats, echo=FALSE, message=FALSE, warning=FALSE, paged.print=FALSE, results="asis"}
  summary.stats  <- data %>% dplyr::select(female, 
                                           age,
                           trust.researchers,
                           trust.researchers.normal.question,
                           trust.researchers.probability.question,
                           trust.researchers.probability.question.10,
                           cooperative.behavior,
                           number.accounts,
                           number.devices)

    names <- names(summary.stats)
    names <- str_replace_all(names, ".normal.question", ", normal question")
    names <- str_replace_all(names, "probability.question.10", ", prob. question 0-10")    
    names <- str_replace_all(names, ".probability.question", ", prob. question")
    names <- str_replace_all(names, "\\.", " ")
    names <- sub("(.)", "\\U\\1", names, perl=TRUE)
    
    stargazer::stargazer(summary.stats, 
                         summary = TRUE, 
                         type="latex",
                         label = "tab:summary", 
                         font.size="footnotesize", 
                         table.placement="H",
                         column.sep.width = "1pt", 
                         title = "Summary statistics: Numeric variables", 
                         digits = 2,
                         rownames = FALSE,
                         header=FALSE,
                         covariate.labels = names,
                         notes.append = FALSE, 
                         notes.align = "l",
                         omit.summary.stat = c("p25", "p75"))

```

```{r table-summary-categorical, echo=FALSE, message=FALSE, warning=FALSE, paged.print=FALSE, results="asis"}
library(magrittr)
library(qwraps2)
options(qwraps2_markup = "markdown")

summary.table <- data %>%
  dplyr::select(.data$education, .data$age.cat.fac) %>%
    summary_table(.)

# Modify table
a <- capture.output(print(summary.table, caption = "Summary statistics: Categorical variables", markup = "latex",
      rtitle = "Variable",
      cnames = c("Distribution")
      ))
a <- gsub("education", "Education (Categories)", a)
a <- gsub("age.cat.fac", "Age (Categories)", a)
a <- gsub("\\\\hline", "", a)
a <- gsub("Unknown", "Missings", a)
a[5] <- gsub("\\|", "", a[5])
a[6] <- "\\hline\\hline"
a[8] <- "\\hline"
a[28] <- "\\hline"
a[28] <- "\\hline"
a[1] <- gsub("\\[t\\]", "\\[H\\]", a[1])
cat(a)
```





# Logistic regression models 

Standard errors and hypothesis tests of linear probability models (LPM) are normally invalid, since LPMS' errors violate assumptions of normality and homoskedasticity [@Scott_Long1997-ci, 38-40]. For this reason, we also provide estimations from logistic regression models in Table \@ref(tab:trust-behavior-lm) below. None of our trust questions (in the pooled sample and subsamples that received the normal or the probability question) display a statistically significant effect on cooperative behavior. We already discussed whether those effects are practically significant, which is not the case. In Table \@ref(tab:table-confidence-intervals), we additionally provide the coefficient estimates with confidence intervals that are based on the profiled log-likelihood function for the models in Table \@ref(tab:trust-behavior-lm). In all models, the 95% intervals cover the 0. 



```{r table-logit-models, echo=FALSE, message=FALSE, warning=FALSE, results="asis"}
data.nomidpoint <- data %>% filter(trust.researchers != 5)
fit1 <- glm(cooperative.behavior ~ trust.researchers, data=data, family = "binomial")
fit2 <- glm(cooperative.behavior ~ trust.researchers.normal.question, data=data, family = "binomial")
fit3 <- glm(cooperative.behavior ~ trust.researchers.probability.question.10, data=data, family = "binomial")
fit4 <- glm(cooperative.behavior ~ trust.researchers, data=data.nomidpoint, family = "binomial")
fit5 <- glm(cooperative.behavior ~ trust.researchers.normal.question, data=data.nomidpoint, family = "binomial")
fit6 <- glm(cooperative.behavior ~ trust.researchers.probability.question.10, data=data.nomidpoint, family = "binomial")

stargazer(fit1, fit2, fit3,
          fit4, fit5, fit6,
          type="latex", 
          title = "Trust and trusting behavior",
          omit.stat=c("LL","ser","f","adj.rsq"), 
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          label = "tab:trust-behavior-lm", 
          table.placement="H", 
          column.sep.width = "-15pt",
          align = TRUE,
          column.labels = c("M7", "M8", "M9", 
                            "M10", "M11", "M12"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001), notes.align = "l",
          notes = "\\parbox[t]{4cm}{Logistic regression models.}",
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          covariate.labels = c("Trust researchers", "Trust researchers (normal question)", "Trust researchers (prob. question 0-10)"),
          dep.var.labels = c("Cooperative behavior")
          )
```



```{r table-confidence-intervals, echo=FALSE, message=FALSE, warning=FALSE}
confint1 <- confint(fit1)
confint2 <- confint(fit2)
confint3 <- confint(fit3)
confint4 <- confint(fit4)
confint5 <- confint(fit5)
confint6 <- confint(fit6)

table.confidence <- data.frame(Model = paste("M", 7:12, sep=""),
      "2.5%" = c(confint1[2,1],
                 confint2[2,1],                 
                 confint3[2,1],
                 confint4[2,1],
                 confint5[2,1],
                 confint6[2,1]),
      "97.5%" = c(confint1[2,2],
                 confint2[2,2],                 
                 confint3[2,2],
                 confint4[2,2],
                 confint5[2,2],
                 confint6[2,2]),
      "Coefficient" = c(coef(fit1)[2],
                        coef(fit2)[2],
                        coef(fit3)[2],
                        coef(fit4)[2],
                        coef(fit5)[2],
                        coef(fit6)[2]))
names(table.confidence)[2:3] <- c("2.5 %", "97.5 %")
table.confidence <- table.confidence %>% mutate_if(is.numeric, round, 3)
kable(table.confidence, row.names = FALSE,
      caption = 'Confidence intervals', format = "latex", booktabs = T) %>%
  kable_styling(full_width = T, 
                latex_options = c("scale_down", "HOLD_position"), font_size = 8)
```



# Correlations 

Table \@ref(tab:correlations) provides Spearman correlations across the variables of interest. The variable 'trust researchers' contains the trust values of all individuals in our sample in which we pool the subsample that answered the standard question and the subsample that answered the probability question.



```{r correlations, echo=FALSE, message=FALSE, warning=FALSE}
# Subset data before calculating correlations
  data.cor <- data %>% dplyr::select(trust.researchers,
                              trust.researchers.normal.question,
                              trust.researchers.probability.question,
                              cooperative.behavior,
                              trust.generalized)

# Correlation matrix
  x <- Hmisc::rcorr(as.matrix(data.cor), type = "spearman")

# Subset cor matrix
  cors <- round(x$r[1:5,], 2)
  Ps <- round(x$P[1:5,], 2)
  Ns <- round(x$n[1:5,], 2)

# Generate matrix/dataframe
  n <- max(sapply(list(cors, Ps, Ns),nrow))
  p <- max(sapply(list(cors, Ps, Ns),ncol))
  Ps <- as.numeric(Ps)
  cors2  <- matrix(paste(cors, " (", Ps, "; ", Ns, ")", sep = "", collapse = NULL),n,p)
  cors2 <- matrix(str_replace(cors2, "NA; ", ""), n, p)
  cors2 <- matrix(str_replace_all(cors2, "NA \\(0\\)", "--"), n, p)
  cors2 <- matrix(str_replace_all(cors2, "0;", "0.00;"), n, p)
  cors2 <- data.frame(cors2)
  rownames(cors2) <- rownames(cors)
  colnames(cors2) <- colnames(cors)
  
  names(cors2) <- c("Trust researchers", "Trust researchers (norm. question)", 
                    "Trust researchers (prob. question)", "Cooperative behavior",
                    "Generalized trust")
  row.names(cors2) <- names(cors2)
  

  options(knitr.kable.NA = '')
  
kable(cors2, 
      row.names = TRUE,
      caption = 'Trust and trusting behavior: Spearman correlations', 
      format = "latex", booktabs = T) %>%
  kable_styling(full_width = T, 
                latex_options = c("scale_down", "HOLD_position"), 
                font_size = 9) %>%
footnote(general = c("P-values and number of observations in parentheses;", "-- = no overlap between observations for normal question and probability question;", "correlations estimated on the basis of the Hmisc R package."), footnote_as_chunk = FALSE)
```



# Trust and cooperation in sample subsets 



```{r table-1-lpms-in-subsets, echo=FALSE, message=FALSE, warning=FALSE, results="asis"}
data_university_degree_yes <- data %>% filter(university.degree==1)
data_university_degree_no <- data %>% filter(university.degree==0)
data_female_yes <- data %>% filter(female==1)
data_female_no <- data %>% filter(female==0)
data_age_below30 <- data %>% filter(age.cat.fac=="Below 30")
data_age_30to50 <- data %>% filter(age.cat.fac=="30 to 49")
data_age_50andhigher <- data %>% filter(age.cat.fac=="50 and higher")

subsets <- ls()[grepl("data_[afu]", ls())]
for(y in subsets){

data.subset <- get(y)

fit1 <- lm(cooperative.behavior ~ trust.researchers, data=data.subset)
fit2 <- lm(cooperative.behavior ~ trust.researchers.normal.question, data=data.subset)
fit3 <- lm(cooperative.behavior ~ trust.researchers.probability.question.10, data=data.subset)

subset_name <- stringr::str_to_title(gsub("_", " ", gsub("data_", "", y)))


stargazer(fit1, fit2, fit3,
          type="latex", 
          title = paste("Subsample - ", subset_name,sep=""),
          omit.stat=c("LL","ser","f","adj.rsq"), 
          ci=FALSE, digits=2, 
          ci.level=0.95,
          single.row=FALSE, 
          label = paste("tab:", gsub("_", "-", y), sep=""), 
          table.placement="H", 
          column.sep.width = "-15pt",
          align = TRUE,
          column.labels = c("M1", "M2", "M3", 
                            "M4", "M5", "M6"),
          model.names = FALSE,
          model.numbers = FALSE,
          star.cutoffs = c(0.05, 0.01, 0.001), notes.align = "l",
          notes = paste("\\parbox[t]{4cm}{Linear probability models.", " Sample subset with characteristic: ", subset_name, "}", sep=""),
          header=FALSE,
          no.space=TRUE,
          font.size = "footnotesize",
          covariate.labels = c("Trust researchers", "Trust researchers (normal question)", "Trust researchers (prob. question 0-10)"),
          dep.var.labels = c("Cooperative behavior")
          )
}
```



# Wording of survey questions {#sec:questions}

The table below outlines different coding decisions. Importantly, all coding is made transparent in the R Markdown file accompanying the study.

| Variable | Question | Coding  |
|------|---------------------|----------------------------------------|
| Trust researchers (normal question)  | *On a scale from 0 "not at all" to 10 "completely", how much do you trust that university researchers only use your personal data internally, so do not share them with third parties?*  | The trust questions were preceded by an intro: *The next questions deal with the topic of trust. Imagine a scale from 0 to 10. 0 means that you have no trust at all in something, 10 means that you trust something completely.*  | 
| Trust researchers (prob. question 0-10)  | *On a scale of 0% to 100%, how likely do you think it is that university researchers only use your personal data internally, so does not share them with third parties?*  | The trust questions were preceded by an intro: *The next questions deal with future events. Imagine a probability scale from 0% to 100%. 0% means that an event is certain not to happen, 100% means that an event is certain to happen.*; The question was subsequently recoded from values 0-100 to values of 0-10 | 
| Trust researchers | Simply combines the above two questions  | For this variable respondents who answered the normal question and the probability question were joined; Beforehand the probability question was recoded to values from 0-10  | 
| Female | *Are you male or female?*  | female = 1, male = 0  | 
| Age  | *In which year were you born?*  | 2018 - year of birth |
| Education  | *What is your highest level of school education?*  | Categorical variable: Hauptschule = Secondary school; Mittlere Reife = Intermediate school-leaving certificate; Fachhochschulreife = Advanced technical college certificate; Abitur = Higher school certificate |
| General privacy concern  | *In general, how concerned are you about your privacy?*  | Categorical variable: Not at all concerned, A little concerned, Somewhat concerned, Very concerned; The variable is recoded to numeric values 0-3.  |
| Number of devices  | *Which of the following devices do you own?*  | Categorical variable: Yes, No; This question was asked for Smartphone/Cell phone/Desktop computer (PC or a laptop)/Tablet/eBook reader; Subsequently, the yes answers across these device categories were summed up; If there was a missing on one of the questions the overall variable 'number of devices' was coded as missing |
| Number of accounts  | *Do you currently have an account with a user name and password with the following services?*  | Categorical variable: Yes, I currently have an account, No, I never had an account, No, not currently, but previously; This question was asked for Google (including Gmail)/Facebook/Twitter/LinkedIn/Xing; Subsequently, the yes answers across these device categories were summed up; If there was a missing on one of the questions the overall variable 'number of accounts' was coded as missing|

Table: Wording of survey questions\label{tab:surveyquestions}



# Distribution of responses to different trust questions 

Fig \@ref(fig-generalized-trust) visualizes the distribution of answers to the generalized trust question.

```{r fig-generalized-trust, fig.cap="Descriptive statistics of the sample", fig.height=4, message=FALSE, warning=FALSE, include=FALSE}

bar.data <- data.frame(x = names(table(data$trust.generalized)),
                       y = as.numeric(table(data$trust.generalized)))
bar.data$x <- factor(bar.data$x, levels = c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
                     ordered = TRUE)

p <- plot_ly(data = bar.data,
        x = ~x,
        y = ~y,
        type = "bar",
        marker = list(color='black')
) %>%
  layout(yaxis = list(title = "N", dtick=50, titlefont = list(size = 16), tickfont = list(size = 16)),
         xaxis = list(title = "Generalized trust (most people)", titlefont = list(size = 16), tickfont = list(size = 16)),
         bargap = 0.1)
orca(p, "fig3.pdf", format = "pdf")
```


\begin{figure}[H]
\centering
\caption{Distribution of generalized trust}\label{fig-generalized-trust}
		\includegraphics[width=0.9\linewidth]{fig3.pdf}
\end{figure}

Fig \@ref(fig-trust-scales-all) provides the distributions of trust in three other trustee categories --- Google, Facebook and the Federal Statistics Office --- on the normal trust question. It immediately becomes clear that trust is distributed very differently for different types of trustees.



```{r fig-distribution-trust-scales, echo=FALSE, message=FALSE, warning=FALSE, fig.cap="Trust distributions for other trustees", fig.height=11}

vars <- c("trust.google.normal.question", "trust.facebook.normal.question", "trust.fedoffstats.normal.question", "trust.researchers.normal.question") 

  bar.data <- data.frame(x = names(table(data[,"trust.google.normal.question"])),
                         y = as.numeric(table(data[,"trust.google.normal.question"])))
  bar.data$x <- factor(bar.data$x, levels = c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
                       ordered = TRUE)

   p1 <-      plot_ly(data = bar.data,
                 x = ~x,
                 y = ~y,
                 type = "bar",
                 marker = list(color='black')
         ) %>%
           layout(yaxis = list(title = "N", dtick=25, range=c(0,225), titlefont = list(size = 14), tickfont = list(size = 14)),
                  xaxis = list(title = "(a) Trust: Google (normal question)", titlefont = list(size = 14), tickfont = list(size = 14)),
                  bargap = 0.1)

  
   
  bar.data <- data.frame(x = names(table(data[,"trust.facebook.normal.question"])),
                         y = as.numeric(table(data[,"trust.facebook.normal.question"])))
  bar.data$x <- factor(bar.data$x, levels = c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
                       ordered = TRUE)

   p2 <-      plot_ly(data = bar.data,
                 x = ~x,
                 y = ~y,
                 type = "bar",
                 marker = list(color='black')
         ) %>%
           layout(yaxis = list(title = "N", dtick=25, range=c(0,225), titlefont = list(size = 14), tickfont = list(size = 14)),
                  xaxis = list(title = "(b) Trust: Facebook (normal question)", titlefont = list(size = 14), tickfont = list(size = 14)),
                  bargap = 0.1)
   
   
  bar.data <- data.frame(x = names(table(data[,"trust.fedoffstats.normal.question"])),
                         y = as.numeric(table(data[,"trust.fedoffstats.normal.question"])))
  bar.data$x <- factor(bar.data$x, levels = c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
                       ordered = TRUE)

   p3 <-      plot_ly(data = bar.data,
                 x = ~x,
                 y = ~y,
                 type = "bar",
                 marker = list(color='black')
         ) %>%
           layout(yaxis = list(title = "N", dtick=25, range=c(0,225), titlefont = list(size = 14), tickfont = list(size = 14)),
                  xaxis = list(title = "(c) Trust: Federal Statistics Office (normal question)", titlefont = list(size = 14), tickfont = list(size = 14)),
                  bargap = 0.1)
   
   
  bar.data <- data.frame(x = names(table(data[,"trust.researchers.normal.question"])),
                         y = as.numeric(table(data[,"trust.researchers.normal.question"])))
  bar.data$x <- factor(bar.data$x, levels = c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
                       ordered = TRUE)

   p4 <-      plot_ly(data = bar.data,
                 x = ~x,
                 y = ~y,
                 type = "bar",
                 marker = list(color='black')
         ) %>%
           layout(yaxis = list(title = "N", dtick=25, range=c(0,225), titlefont = list(size = 14), tickfont = list(size = 14)),
                  xaxis = list(title = "(d) Trust: Researchers (normal question)", titlefont = list(size = 14), tickfont = list(size = 14)),
                  bargap = 0.1)
   
   
   

p.sm <- subplot(p1, p2, p3, p4, nrows=2, shareX = FALSE, shareY = FALSE,
                titleX = T, titleY = T, margin = 0.05)  %>%
  layout(showlegend = FALSE, autosize = T, height = 700, width = 800)
orca(p.sm, "fig4.pdf", format = "pdf", height = 700, width = 800)
```


\begin{figure}[H]
\centering
\caption{Distribution of trust across different trustees}\label{fig-trust-scales-all}
		\includegraphics[width=0.9\linewidth]{fig4.pdf}
\end{figure}





# Randomization 
  
Fig \@ref(fig-randomization) depicts the distribution across the four groups. The total N was ```r nrow(data_beford_discard)``` before the data of Group 3 and 4 was discarded for our analysis.

```{r fig-randomization, fig.cap="Randomization", fig.height=5, message=FALSE, warning=FALSE, include=FALSE}
bar.data <- data.frame(x = names(table(data_beford_discard$random_id)),
                       y = as.numeric(table(data_beford_discard$random_id)))
p <- plot_ly(data = bar.data,
        x = ~x,
        y = ~y,
        type = "bar",
        marker = list(color='black')
) %>%
  layout(yaxis = list(title = "N", titlefont = list(size = 16), tickfont = list(size = 16)),
         xaxis = list(title = "Randomization", titlefont = list(size = 16), tickfont = list(size = 16)),
         bargap = 0.1)
orca(p, file = "fig5.pdf", format = "pdf")
```


\begin{figure}[H]
\centering
\caption{Randomization}\label{fig-randomization}
		\includegraphics[width=0.9\linewidth]{fig5.pdf}
\begin{flushleft}
\end{flushleft}
\end{figure}




# R session info 

```{r echo=FALSE}
print(sessionInfo(), local = FALSE)
```

# References

1. Scott Long J. Regression models for categorical and limited dependent variables. Thousand Oaks, London, New Delhi: Sage Publications; 1997.

